########################
## Function to convert a
## dictionary to a pandas DF.
########################
def dictTOdf(results):
"""
Converts a dictionary to a pandas DataFrame for easier analysis.
Parameters:
results (dict): Dictionary containing the results data.
Returns:
df (DataFrame): Converted DataFrame with specific columns.
"""
df = pd.DataFrame({'Accuracy': results["Acc"],
'Balanced Acc': results["Bal_acc"],
'F1': results["F1"],
'Recall': results["recall"],
'Precision': results["precision"],
'Avg precision': results["average_precision"],
'roc_auc': results["roc_auc"]}, index=results["model"])
return(df.sort_values('F1')) # Return the sorted DataFrame based on the 'F1' column.
########################
## Function to generate
## figures 1A, 1B, 2A, 2B, S4A
########################
def plotSTD(results_whole,fileName):
"""
Generates a plot showing standard deviation based on the provided results.
Parameters:
results_whole (dict): Dictionary containing the entire results data.
fileName (str): Name of the file to save the plot.
Returns:
Figure
"""
# calculate MLcps
results_whole_df=dictTOdf(results_whole)
cpsScore=calculate(results_whole_df)
df=results_whole_df
x_data = list(df.index)
y_data = [round(num,3) for num in list(df.mean(axis=1))]
mlcps_score = [round(num,4) for num in list(cpsScore['Score'])]
err_y_data=list(df.std(axis=1))
#===================
## sort for SD
#===================
err_y_data, y_data, x_data,mlcps_score=[list(v) for v in zip(*sorted(zip(err_y_data, y_data, x_data,mlcps_score)))]
fig = make_subplots(rows=1, cols=1,
x_title='<b>Models</b>',specs=[[{"secondary_y": True}]])
for i, bar in enumerate(err_y_data):
fig.add_trace(go.Scatter(
x=[x_data[i]],
y=[y_data[i]],
#text=err_y_data,
mode='markers+text',
textposition='top center',
error_y=dict(
type='data',
color = "black",
array=[bar],
visible=True),
marker=dict(color='rgba(0,0,0,0)', size=8),
showlegend=False,
),secondary_y=False, row=1, col=1)
fig.add_trace(go.Scatter(x=x_data, y=mlcps_score,
text=np.round(mlcps_score,
3),
mode='markers+text',
textposition='top right',
name="MLcps",
marker=dict(color="#C82F02", size=6),
),secondary_y=True)
fig.update_yaxes(title_text="<b>Metrics Score (Standard Deviation)</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>MLcps</b>", secondary_y=True)
fig.layout.template="plotly_white"
#fig.write_image(fileName+".png",scale=5,width=1000)
return(fig)
########################
## Function to generate
## figures 1C, 1D
########################
def plotCPS_bar_train(results_whole,fileName):
"""
Generates bar plots for the MLcps based on training data.
Parameters:
results_whole (dict): Dictionary containing the entire results data.
fileName (str): Name of the file to save the plot.
Returns:
Figure
"""
# calcualte MLcps for train and merge them in a single df
cpsScore_train=calculate(dictTOdf(results_whole))
models = cpsScore_train['Algorithms']
train_scores = cpsScore_train['Score']
# Transpose the data for horizontal bars
models_transposed = [ "Training"]
scores_transposed = [ train_scores]
# Define pastel colors
pastel_colors = [ 'lightblue']
# Create a horizontal bar plot to visualize training and test scores for each model
fig = go.Figure()
for i, model in enumerate(models_transposed):
# Sort the models based on scores and calculate rankings
scores = scores_transposed[i]
def custom_sort_key(index):
return (scores[index], index)
sorted_indices = sorted(range(len(scores)), key=custom_sort_key, reverse=True)
rankings = [sorted_indices.index(j) + 1 for j in range(len(scores))]
# Format the scores with up to three decimal places
formatted_scores = [f"{score:.6f}" for score in scores]
# Create custom HTML labels with colored rank text
labels = [f"{formatted_scores[j]} ( Rank <b><span ';'>{rank}</span> </b>)" for j, rank in enumerate(rankings)]
# Add bar trace
bar_trace = go.Bar(
y=models,
x=scores_transposed[i],
text=labels,
legendgroup=model,
orientation='h',
name=model,
textposition="auto",
marker=dict(color=pastel_colors[i],line=dict(width=0.8, color='black'))
)
fig.add_trace(bar_trace)
# Customize the layout
fig.update_layout(
title=None,
xaxis_title="<b>MLcps</b>",
yaxis_title="<b>Models</b>",
barmode='group' ,
template="plotly_white",
legend_traceorder="reversed",
legend=dict(x=0.95, y=1),
width=600,height=900,
yaxis=dict(title_standoff=0),
font=dict(size=16)
)
fig.update_layout(yaxis = dict(tickfont = dict(size=18),title_font=dict(size=18)))
fig.update_layout(xaxis = dict(tickfont = dict(size=18),title_font=dict(size=18)))
#fig.write_image(fileName+".png",scale=5)
return(fig)
########################
## Function to generate
## figures 2C, 2D, S4B
########################
def plotCPS_bar_test_train(results_whole,results_test,fileName):
"""
Generates bar plots for the MLcps based on both test and training data.
Parameters:
results_whole (dict): Dictionary containing the entire results data for training.
results_test (dict): Dictionary containing the entire results data for testing.
fileName (str): Name of the file to save the plot.
Returns:
Figure
"""
# calcualte MLcps for train and test results
# and merge them in a single df
cpsScore_train=calculate(dictTOdf(results_whole))
cpsScore_test=calculate(dictTOdf(results_test))
merged_df = cpsScore_train.merge(cpsScore_test, on='Algorithms', how='inner') # You can use 'how' to specify the type of join
# Sample data
models = merged_df['Algorithms']
train_scores = merged_df['Score_x']
test_scores = merged_df['Score_y']
# Transpose the data for horizontal bars
models_transposed = [ "Test","Training"]
scores_transposed = [ test_scores,train_scores]
# Define colors
pastel_colors = ['lightcoral', 'lightblue']
# Create a horizontal bar plot to visualize training and test scores for each model
fig = go.Figure()
for i, model in enumerate(models_transposed):
# Sort the models based on scores and calculate rankings
scores = scores_transposed[i]
def custom_sort_key(index):
return (scores[index], index)
sorted_indices = sorted(range(len(scores)), key=lambda x: scores[x], reverse=True)
rankings = [sorted_indices.index(j) + 1 for j in range(len(scores))]
# Format the scores with up to three decimal places
formatted_scores = [f"{score:.6f}" for score in scores]
# Create custom HTML labels with colored rank text
labels = [f"{formatted_scores[j]} ( Rank <b><span ';'>{rank}</span> </b>)" for j, rank in enumerate(rankings)]
# Add bar trace
bar_trace = go.Bar(
y=models,
x=scores_transposed[i],
text=labels,
legendgroup=model,
textposition='auto',
orientation='h',
name=model,
marker=dict(color=pastel_colors[i],line=dict(width=0.8, color='black'))
)
fig.add_trace(bar_trace)
# Customize the layout
fig.update_layout(
title=None,
xaxis_title="<b>MLcps</b>",
yaxis_title="<b>Models</b>",
barmode='group' ,
template="plotly_white",
legend_traceorder="reversed",
legend=dict(x=0.95, y=1),
width=600,height=900,
yaxis=dict(title_standoff=0),
font=dict(size=16)
)
fig.update_layout(yaxis = dict(tickfont = dict(size=18),title_font=dict(size=18)))
fig.update_layout(xaxis = dict(tickfont = dict(size=18),title_font=dict(size=18)))
#fig.write_image(fileName+".png",scale=5)
return(fig)
########################
## Function to generate
## figure S4A
########################
def bar_new(results_whole,results_test,fileName):
"""
Generates bar plot for the body signal dataset.
Parameters:
results_whole (dict): Dictionary containing the entire results data.
results_test (dict): Dictionary containing the test results data.
fileName (str): Name of the file to save the plot.
Returns:
Figure
"""
results_whole.index = results_whole.index.str.split('-').str[0]
results_test.index = results_test.index.str.split('-').str[0]
# Line plot
cpsScore_train=calculate(results_whole)
cpsScore_test=calculate(results_test)
merged_df = cpsScore_train.merge(cpsScore_test, on='Algorithms', how='inner') # You can use 'how' to specify the type of join
# Sample data
models = merged_df['Algorithms']
train_scores = merged_df['Score_x']
test_scores = merged_df['Score_y']
# Transpose the data for horizontal bars
models_transposed = [ "Test","Training"]
scores_transposed = [ test_scores,train_scores]
# Define pastel colors
pastel_colors = ['lightcoral', 'lightblue']
# Create a horizontal bar plot to visualize training and test scores for each model
fig = go.Figure()
for i, model in enumerate(models_transposed):
# Sort the models based on scores and calculate rankings
scores = scores_transposed[i]
def custom_sort_key(index):
return (scores[index], index)
#sorted_indices = sorted(range(len(scores)), key=custom_sort_key, reverse=True)
sorted_indices = sorted(range(len(scores)), key=lambda x: scores[x], reverse=True)
rankings = [sorted_indices.index(j) + 1 for j in range(len(scores))]
# Format the scores with up to three decimal places
formatted_scores = [f"{score:.6f}" for score in scores]
# Create custom HTML labels with colored rank text
labels = [f"{formatted_scores[j]} ( Rank <b><span ';'>{rank}</span> </b>)" for j, rank in enumerate(rankings)]
# Add bar trace
bar_trace = go.Bar(
y=models,
x=scores_transposed[i],
text=labels,
legendgroup=model,
textposition='auto',
orientation='h',
name=model,
marker=dict(color=pastel_colors[i],line=dict(width=0.8, color='black'))
)
fig.add_trace(bar_trace)
# Customize the layout
fig.update_layout(
title=None,
xaxis_title="<b>MLcps</b>",
yaxis_title="<b>Models</b>",
barmode='group' ,
template="plotly_white",
legend_traceorder="reversed",
legend=dict(x=0.95, y=1),
width=700,height=900,
yaxis=dict(title_standoff=0),
font=dict(size=16)
)
fig.update_layout(yaxis = dict(tickfont = dict(size=18),title_font=dict(size=18)))
fig.update_layout(xaxis = dict(tickfont = dict(size=18),title_font=dict(size=18)))
#fig.write_image(fileName+".png",scale=5)
return(fig)
########################
## Function to generate
## figure S4B
########################
def stdPlotNew(results_whole_df,fileName):
"""
Generates a standard deviation plot for the body signal dataset.
Parameters:
results_whole_df (DataFrame): DataFrame containing the entire results data.
fileName (str): Name of the file to save the plot.
Returns:
Figure
"""
results_whole_df.index = results_whole_df.index.str.split('-').str[0]
results_whole_df=results_whole_df.sort_values('f1')
df=results_whole_df
x_data = list(df.index)
y_data = [round(num,3) for num in list(df.mean(axis=1))]
err_y_data=list(df.std(axis=1))
cpsScore=calculate(results_whole_df)
mlcps_score = []
for m in x_data:
num = cpsScore[cpsScore['Algorithms'] == m]['Score'].values[0]
mlcps_score.append(round(num,4))
#===============
## sort for SD
#===============
err_y_data, y_data, x_data,mlcps_score=[list(v) for v in zip(*sorted(zip(err_y_data, y_data, x_data,mlcps_score), key=lambda x: x[0], reverse=True))]
fig = make_subplots(rows=1, cols=1,
x_title='<b>Models</b>',specs=[[{"secondary_y": True}]])
for i, bar in enumerate(err_y_data):
fig.add_trace(go.Scatter(
x=[x_data[i]],
y=[y_data[i]],
#text=y_data[i],
mode='markers+text',
textposition='top center',
error_y=dict(
type='data',
color = "black",
array=[bar],
visible=True),
marker=dict(color='rgba(0,0,0,0)', size=8),
showlegend=False,
),secondary_y=False, row=1, col=1)
fig.add_trace(go.Scatter(x=x_data, y=mlcps_score,
text=np.round(mlcps_score,
3),
mode='markers+text',
textposition='top right',
name="MLcps",
marker=dict(color="#C82F02", size=6),
),secondary_y=True)
fig.update_yaxes(title_text="<b>Metrics Score (Standard Deviation)</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>MLcps</b>", secondary_y=True)
fig.layout.template="plotly_white"
#fig.write_image(fileName+".png",scale=5,width=1100)
return(fig)